Data preparation
My first idea was to compare opioid deaths and alcohol deaths as
alcohol is the most common and popular drug of choice, so popular and
mainstream in fact that most people do not consider it a ‘drug’. I
#Creating line graph for rate of opioid and alcohol related deaths over time 1990-2019 in the USA
#load relevant packages
library(dplyr)
library(here)
library(tidyverse)
library(readr)
library(ggplot2)
#load data
usdeathcompared <- read.csv(here('alcoholvsopioid.csv'))
#deleting unnecessary columns
columns_to_delete <- c("measure_id", "measure_name","cause_id", "metric_id","metric_name","age_id","sex_id","sex_name","age_name","upper","lower", "location_name")
usdeathcompared <- usdeathcompared[, !(names(usdeathcompared) %in% columns_to_delete)]
#renaming columns
##val change to rate_deaths
colnames(usdeathcompared)[colnames(usdeathcompared)=="val"] <-"rate_death"
view(usdeathcompared)
#make line graph
##make wide data
usdeath<- usdeathcompared %>%
pivot_wider(names_from = cause_name, values_from = rate_death)
##change column names
colnames(usdeath)[colnames(usdeath)=="Alcohol use disorders"] <-"Alcohol"
colnames(usdeath)[colnames(usdeath)=="Opioid use disorders"] <-"Opioid"
##plot data
alcoholvsopioid <- ggplot(usdeath, aes(x = year)) +
geom_line(aes(y = Alcohol, color = factor("Alcohol")), linetype = "solid", size = 1.65) +
geom_line(aes(y = Opioid, color = factor("Opioid")), linetype = "solid", size = 1.65) +
scale_color_manual(name = "Cause", values = c("Alcohol" = "dodgerblue", "Opioid" = "firebrick")) +
labs(title = "Alcohol- and Opioid Use-Related Death Rates",
subtitle = "US Data from 1990-2019",
caption = "Source: Institute for Health Metrics and Evaluation",
x = "Year",
y = "Death Rate per 100,000",
color = "Cause") +
scale_y_continuous(limits = c(0, max(usdeath$Alcohol, usdeath$Opioid) + 1)) +
theme_minimal() +
theme(plot.title = element_text(color = "#0099f9", size = 20, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 13, face = "bold", hjust = 0.5),
plot.caption = element_text(face = "italic", hjust = 0)) +
theme(axis.title.x = element_text(color = "#0099f9", size = 16, face = "bold"),
axis.title.y = element_text(color = "#0099f9", size = 16, face = "italic"))
print(alcoholvsopioid)

##Interactive Plot Next I decided I wanted to make an interactive
plot for the main 4 drug related deaths in the US
library(here)
library(tidyverse)
library(readr)
library(gganimate)
library(plotly)
usdrugdeaths <- read.csv(here('alldrugsus.csv'))
#deleting unnessecary columns
columns_to_delete <- c("measure_id", "measure_name","cause_id", "metric_id","metric_name","age_id","sex_id","sex_name","age_name","upper","lower", "location_name")
usdrugdeaths <- usdrugdeaths[, !(names(usdrugdeaths) %in% columns_to_delete)]
#renaming columns
##val change to rate_deaths
colnames(usdrugdeaths)[colnames(usdrugdeaths)=="val"] <-"rate_death"
#make wide data
usdrugdeaths<- usdrugdeaths %>%
pivot_wider(names_from = cause_name, values_from = rate_death)
#change column names
colnames(usdrugdeaths)[colnames(usdrugdeaths)=="Alcohol use disorders"] <-"Alcohol"
colnames(usdrugdeaths)[colnames(usdrugdeaths)=="Opioid use disorders"] <-"Opioid"
colnames(usdrugdeaths)[colnames(usdrugdeaths)=="Cocaine use disorders"] <-"Cocaine"
colnames(usdrugdeaths)[colnames(usdrugdeaths)=="Amphetamine use disorders"] <-"Amphetamine"
#make line graph with all 4 drugs
drugdeaths <- ggplot(usdrugdeaths, aes(x = year)) +
geom_line(aes(y = Alcohol, color = factor("Alcohol")), linetype = "solid", size = 1.65) +
geom_line(aes(y = Opioid, color = factor("Opioid")), linetype = "solid", size = 1.65) +
geom_line(aes(y = Cocaine, color = factor("Cocaine")), linetype = "solid", size = 1.65) +
geom_line(aes(y = Amphetamine, color = factor("Amphetamine")), linetype = "solid", size = 1.65) +
scale_color_manual(name = "Cause", values = c("Alcohol" = "dodgerblue", "Opioid" = "firebrick", "Cocaine"= "darkgreen", "Amphetamine"="purple")) +
labs(title = "Drug-related Deaths in the US",
subtitle = "Data from 1990-2019",
caption = "Source: Institute for Health Metrics and Evaluation",
x = "Year",
y = "Death Rate per 100,000",
color = "Cause") +
scale_y_continuous(limits = c(0, max(usdrugdeaths$Alcohol, usdrugdeaths$Opioid, usdrugdeaths$Cocaine, usdrugdeaths$Amphetamine) + 1)) +
theme_minimal() +
theme(plot.title = element_text(color = "#0099f9", size = 20, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 13, face = "bold", hjust = 0.5),
plot.caption = element_text(face = "italic", hjust = 0)) +
theme(axis.title.x = element_text(color = "#0099f9", size = 16, face = "bold"),
axis.title.y = element_text(color = "#0099f9", size = 16, face = "italic"))
#print none interactive plot
print(drugdeaths)

# Convert ggplot to plotly
interactive_plot <- ggplotly(drugdeaths)
#save interactive plot
library(htmlwidgets)
# Save the interactive plot as an HTML file
saveWidget(interactive_plot, "interactive_plot.html")
# View the interactive plot (can be exported as HTML)
htmltools::includeHTML("interactive_plot.html")
plotly
#Making Maps For my final plot I wanted to make an animated plot of
the changes in opioid death by US state across 20 years. Initally I
created a gif that circles through the 30 years of data. Then I decided
to make a more interactive plot whereby individuals could look at
specific years in more detail
#load up relevant packages
library(here)
library(tidyverse)
library(readr)
library(ggplot2)
# read and open data
## save data file as df
df <- read.csv(here('rateofdeath.csv'))
#exploring data
head(df)
## measure_id measure_name location_id location_name sex_id sex_name age_id
## 1 1 Deaths 570 Washington 3 Both 22
## 2 1 Deaths 523 Alabama 3 Both 22
## 3 1 Deaths 539 Kansas 3 Both 22
## 4 1 Deaths 571 West Virginia 3 Both 22
## 5 1 Deaths 555 New York 3 Both 22
## 6 1 Deaths 534 Hawaii 3 Both 22
## age_name cause_id cause_name metric_id metric_name year val
## 1 All ages 562 Opioid use disorders 3 Rate 1990 1.8782698
## 2 All ages 562 Opioid use disorders 3 Rate 1990 0.8859001
## 3 All ages 562 Opioid use disorders 3 Rate 1990 0.8549468
## 4 All ages 562 Opioid use disorders 3 Rate 1990 1.3670257
## 5 All ages 562 Opioid use disorders 3 Rate 1990 2.7087565
## 6 All ages 562 Opioid use disorders 3 Rate 1992 1.3763670
## upper lower
## 1 2.1146290 1.6561179
## 2 0.9941986 0.7878020
## 3 0.9640723 0.7650459
## 4 1.5184300 1.2275772
## 5 3.0503677 2.3778905
## 6 1.5490940 1.2099410
summary(df)
## measure_id measure_name location_id location_name sex_id
## Min. :1 Length:1530 Min. :523 Length:1530 Min. :3
## 1st Qu.:1 Class :character 1st Qu.:535 Class :character 1st Qu.:3
## Median :1 Mode :character Median :548 Mode :character Median :3
## Mean :1 Mean :548 Mean :3
## 3rd Qu.:1 3rd Qu.:561 3rd Qu.:3
## Max. :1 Max. :573 Max. :3
## sex_name age_id age_name cause_id
## Length:1530 Min. :22 Length:1530 Min. :562
## Class :character 1st Qu.:22 Class :character 1st Qu.:562
## Mode :character Median :22 Mode :character Median :562
## Mean :22 Mean :562
## 3rd Qu.:22 3rd Qu.:562
## Max. :22 Max. :562
## cause_name metric_id metric_name year
## Length:1530 Min. :3 Length:1530 Min. :1990
## Class :character 1st Qu.:3 Class :character 1st Qu.:1997
## Mode :character Median :3 Mode :character Median :2004
## Mean :3 Mean :2004
## 3rd Qu.:3 3rd Qu.:2012
## Max. :3 Max. :2019
## val upper lower
## Min. : 0.4718 Min. : 0.5292 Min. : 0.4182
## 1st Qu.: 2.2725 1st Qu.: 2.5425 1st Qu.: 2.0236
## Median : 5.0473 Median : 5.6695 Median : 4.5100
## Mean : 6.2978 Mean : 7.1181 Mean : 5.5586
## 3rd Qu.: 8.5806 3rd Qu.: 9.5911 3rd Qu.: 7.6202
## Max. :38.4191 Max. :47.7953 Max. :31.5799
str(df)
## 'data.frame': 1530 obs. of 16 variables:
## $ measure_id : int 1 1 1 1 1 1 1 1 1 1 ...
## $ measure_name : chr "Deaths" "Deaths" "Deaths" "Deaths" ...
## $ location_id : int 570 523 539 571 555 534 553 572 552 550 ...
## $ location_name: chr "Washington" "Alabama" "Kansas" "West Virginia" ...
## $ sex_id : int 3 3 3 3 3 3 3 3 3 3 ...
## $ sex_name : chr "Both" "Both" "Both" "Both" ...
## $ age_id : int 22 22 22 22 22 22 22 22 22 22 ...
## $ age_name : chr "All ages" "All ages" "All ages" "All ages" ...
## $ cause_id : int 562 562 562 562 562 562 562 562 562 562 ...
## $ cause_name : chr "Opioid use disorders" "Opioid use disorders" "Opioid use disorders" "Opioid use disorders" ...
## $ metric_id : int 3 3 3 3 3 3 3 3 3 3 ...
## $ metric_name : chr "Rate" "Rate" "Rate" "Rate" ...
## $ year : int 1990 1990 1990 1990 1990 1992 1991 1990 1991 1992 ...
## $ val : num 1.878 0.886 0.855 1.367 2.709 ...
## $ upper : num 2.115 0.994 0.964 1.518 3.05 ...
## $ lower : num 1.656 0.788 0.765 1.228 2.378 ...
#deleting unnessecary columns
columns_to_delete <- c("measure_id", "measure_name","cause_id", "metric_id","cause_name","metric_name","age_id","sex_id","sex_name","age_name","upper","lower")
df <- df[, !(names(df) %in% columns_to_delete)]
#renaming collumns
##location_name change to state
colnames(df)[colnames(df) == "location_name"] <- "state"
##val change to number_deaths
colnames(df)[colnames(df)=="val"] <-"death_rate"
#change to lower case
df$state <- tolower(df$state)
df <- df %>% mutate(number_deaths = as.numeric(death_rate))
#subset data set by year
##1990
desired_year1990 <- 1990
year1990 <- df[which(df$year == desired_year1990),]
##1991
desired_year1991 <- 1991
year1991 <- df[which(df$year == desired_year1991),]
##1992
desired_year1992 <- 1992
year1992 <- df[which(df$year == desired_year1992),]
##1993
desired_year1993 <- 1993
year1993 <- df[which(df$year == desired_year1993),]
##1994
desired_year1994 <- 1994
year1994 <- df[which(df$year == desired_year1994),]
##1995
desired_year1995 <- 1995
year1995 <- df[which(df$year == desired_year1995),]
##1996
desired_year1996 <- 1996
year1996 <- df[which(df$year == desired_year1996),]
##1997
desired_year1997 <- 1997
year1997 <- df[which(df$year == desired_year1997),]
##1998
desired_year1998 <- 1998
year1998 <- df[which(df$year == desired_year1998),]
##1999
desired_year1999 <- 1999
year1999 <- df[which(df$year == desired_year1999),]
##2000
desired_year2000 <- 2000
year2000 <- df[which(df$year == desired_year2000),]
##2001
desired_year2001 <- 2001
year2001 <- df[which(df$year == desired_year2001),]
##2002
desired_year2002 <- 2002
year2002 <- df[which(df$year == desired_year2002),]
##2003
desired_year2003 <- 2003
year2003 <- df[which(df$year == desired_year2003),]
##2004
desired_year2004 <- 2004
year2004 <- df[which(df$year == desired_year2004),]
##2005
desired_year2005 <- 2005
year2005 <- df[which(df$year == desired_year2005),]
##2006
desired_year2006 <- 2006
year2006 <- df[which(df$year == desired_year2006),]
##2007
desired_year2007 <- 2007
year2007 <- df[which(df$year == desired_year2007),]
##2008
desired_year2008 <- 2008
year2008 <- df[which(df$year == desired_year2008),]
##2009
desired_year2009 <- 2009
year2009 <- df[which(df$year == desired_year2009),]
##2010
desired_year2010 <- 2010
year2010 <- df[which(df$year == desired_year2010),]
##2011
desired_year2011 <- 2011
year2011 <- df[which(df$year == desired_year2011),]
##2012
desired_year2012 <- 2012
year2012 <- df[which(df$year == desired_year2012),]
##2013
desired_year2013 <- 2013
year2013 <- df[which(df$year == desired_year2013),]
##2014
desired_year2014 <- 2014
year2014 <- df[which(df$year == desired_year2014),]
##2015
desired_year2015 <- 2015
year2015 <- df[which(df$year == desired_year2015),]
##2016
desired_year2016 <- 2016
year2016 <- df[which(df$year == desired_year2016),]
##2017
desired_year2017 <- 2017
year2017 <- df[which(df$year == desired_year2017),]
##2018
desired_year2018 <- 2018
year2018 <- df[which(df$year == desired_year2018),]
##2019
desired_year2019 <- 2019
year2019 <- df[which(df$year == desired_year2019),]
# could make maps for every year, however these would all have their individual
#scales for death rate and therefore the colour changing would only represent change for that specific year and not across the years
library(maps)
us_states <- map_data("state")
head(us_states)
## long lat group order region subregion
## 1 -87.46201 30.38968 1 1 alabama <NA>
## 2 -87.48493 30.37249 1 2 alabama <NA>
## 3 -87.52503 30.37249 1 3 alabama <NA>
## 4 -87.53076 30.33239 1 4 alabama <NA>
## 5 -87.57087 30.32665 1 5 alabama <NA>
## 6 -87.58806 30.32665 1 6 alabama <NA>
#change region to state in map data
colnames(us_states)[colnames(us_states) == "region"] <- "state"
#create map for 1990
p1990 <- ggplot(data = us_states,
mapping = aes(x = long, y = lat,
group = group))
p1990 + geom_polygon(fill = "white", color = "black")

p1990 <- ggplot(data = us_states,
aes(x = long, y = lat,
group = group, fill = state))
p1990 + geom_polygon(color = "gray90", size = 0.1) + guides(fill = FALSE)

#align map to correct latitude and longitude
p1990 <- ggplot(data = us_states,
mapping = aes(x = long, y = lat,
group = group, fill = state))
p1990 + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
guides(fill = FALSE)

#join opioid death data with map data
map1990 <- left_join(year1990, us_states)
#plot opioid death on map data for 1990
p1990 <- ggplot(data = map1990,
aes(x = long, y = lat,
group = group, fill = death_rate))
p1990 + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
scale_fill_gradient(low = "white", high = "darkred", na.value = "white", name = "Death Rate")

#make a loop for all death maps 1990-2019
#keep death rate scale constant through all years to show true change in death rate
overall_scale_limits <- range(df$death_rate, na.rm = TRUE)
generate_and_save_map <- function(current_year) {
current_data <- df %>% filter(year == current_year)
map_data <- left_join(current_data, us_states, by = "state")
p <- ggplot(data = map_data,
aes(x = long, y = lat,
group = group, fill = death_rate)) +
geom_polygon(color = "black", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
scale_fill_gradient(low = "white", high = "red", na.value = "yellow",
name = "Death Rate", limits = overall_scale_limits) +
ggtitle(paste("US Opioid-Use Related Death Rate - Year", current_year)) +
labs(subtitle = "Institute for Health Metrics and Evaluation") +
theme(plot.title = element_text(size = 18, face = "bold"),
plot.subtitle = element_text(size = 14),
panel.grid.major = element_blank(), # Remove major gridlines
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent"))
# Save the plot as an image
ggsave(filename = paste("opioid_death_map_", current_year, ".png", sep = ""), plot = p)
}
# Generate and save frames for each year
lapply(1990:2019, generate_and_save_map)
## [[1]]
## [1] "opioid_death_map_1990.png"
##
## [[2]]
## [1] "opioid_death_map_1991.png"
##
## [[3]]
## [1] "opioid_death_map_1992.png"
##
## [[4]]
## [1] "opioid_death_map_1993.png"
##
## [[5]]
## [1] "opioid_death_map_1994.png"
##
## [[6]]
## [1] "opioid_death_map_1995.png"
##
## [[7]]
## [1] "opioid_death_map_1996.png"
##
## [[8]]
## [1] "opioid_death_map_1997.png"
##
## [[9]]
## [1] "opioid_death_map_1998.png"
##
## [[10]]
## [1] "opioid_death_map_1999.png"
##
## [[11]]
## [1] "opioid_death_map_2000.png"
##
## [[12]]
## [1] "opioid_death_map_2001.png"
##
## [[13]]
## [1] "opioid_death_map_2002.png"
##
## [[14]]
## [1] "opioid_death_map_2003.png"
##
## [[15]]
## [1] "opioid_death_map_2004.png"
##
## [[16]]
## [1] "opioid_death_map_2005.png"
##
## [[17]]
## [1] "opioid_death_map_2006.png"
##
## [[18]]
## [1] "opioid_death_map_2007.png"
##
## [[19]]
## [1] "opioid_death_map_2008.png"
##
## [[20]]
## [1] "opioid_death_map_2009.png"
##
## [[21]]
## [1] "opioid_death_map_2010.png"
##
## [[22]]
## [1] "opioid_death_map_2011.png"
##
## [[23]]
## [1] "opioid_death_map_2012.png"
##
## [[24]]
## [1] "opioid_death_map_2013.png"
##
## [[25]]
## [1] "opioid_death_map_2014.png"
##
## [[26]]
## [1] "opioid_death_map_2015.png"
##
## [[27]]
## [1] "opioid_death_map_2016.png"
##
## [[28]]
## [1] "opioid_death_map_2017.png"
##
## [[29]]
## [1] "opioid_death_map_2018.png"
##
## [[30]]
## [1] "opioid_death_map_2019.png"